{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run Simulations\n",
    "\n",
    "Given an encoded dataset of targets and queries, run simulations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import primo.models\n",
    "\n",
    "import cupyck\n",
    "\n",
    "from tqdm.notebook import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "hosts = [\n",
    "    (\"localhost\", 2046),\n",
    "]\n",
    "client = cupyck.Client(hosts)\n",
    "simulator = primo.models.Simulator(client)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_seqs = pd.read_hdf('/tf/primo/data/extended_targets/feature_seqs.h5')\n",
    "query_seqs = pd.read_hdf('/tf/primo/data/queries/feature_seqs.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "pairs = (target_seqs\n",
    " .rename(columns={'FeatureSequence':'target_features'})\n",
    " .assign(query_features = query_seqs.loc['callie_janelle'].FeatureSequence)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4,000 here is just a memory-management batch size so that each progress chunk reports period of time.\n",
    "split_size = 4000\n",
    "nsplits = len(pairs) / split_size\n",
    "splits = np.array_split(pairs, nsplits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5458aa4d677d437b8ffb0e4acf517d42",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(HTML(value=u''), FloatProgress(value=0.0, max=1394.0), HTML(value=u'')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "result_store = pd.HDFStore('/tf/primo/data/simulation/extended_targets/callie_janelle.h5', complevel=9, mode='w')\n",
    "try:\n",
    "    for split in tqdm(splits):\n",
    "        results = simulator.simulate(split)\n",
    "        result_store.append('df', results[['duplex_yield']])\n",
    "finally:\n",
    "    result_store.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15+"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}